/*
 *  lwk/mm/bootmem.c
 *
 *  Copyright (C) 1999 Ingo Molnar
 *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
 *
 *  simple boot-time physical memory area allocator and
 *  free memory collector. It's used to deal with reserved
 *  system memory and memory holes as well.
 */

#include <lwk/init.h>
#include <lwk/pfn.h>
#include <lwk/bootmem.h>
#include <lwk/params.h>
#include <lwk/log2.h>
#include <lwk/pmem.h>
#include <lwk/kmem.h>
#include <lwk/bitops.h>
#include <arch/io.h>

/**
 * Set to true once bootmem allocator has been destroyed.
 */
static bool bootmem_destoyed = false;

/**
 * Access to this subsystem has to be serialized externally.
 * (this is true for the boot process anyway)
 */


/**
 * Amount of system memory to reserve for use by the kernel. The first
 * kmem_size bytes of system memory [0, kmem_size) will be added to the
 * kernel memory pool. The remainder of system memory is left untouched by
 * the kernel and is available for use by applications.
 */
static unsigned long kmem_size = (1024 * 1024 * 64);  /* default is first 64 MB */
param(kmem_size, ulong);


/**
 *
 */
static bootmem_data_t __initdata bootmem_data;

/**
 * List of bootmem_data structures, each describing a section of
 * physical memory.
 */
static LIST_HEAD(bdata_list);

/**
 * Returns the number of _pages_ that will be allocated for the boot bitmap.
 */
unsigned long __init
bootmem_bootmap_pages(unsigned long pages)
{
	unsigned long mapsize;

	mapsize = (pages+7)/8;
	mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK;
	mapsize >>= PAGE_SHIFT;

	return mapsize;
}

/**
 * Links a newly created bootmem_data structure to the bdata_list.
 */
static void __init
link_bootmem(bootmem_data_t *bdata)
{
	bootmem_data_t *ent;
	if (list_empty(&bdata_list)) {
		list_add(&bdata->list, &bdata_list);
		return;
	}
	/* insert in order */
	list_for_each_entry(ent, &bdata_list, list) {
		if (bdata->node_boot_start < ent->node_boot_start) {
			list_add_tail(&bdata->list, &ent->list);
			return;
		}
	}
	list_add_tail(&bdata->list, &bdata_list);
	return;
}

/**
 * Called once to set up the allocator itself.
 */
static unsigned long __init
init_bootmem_core(
	bootmem_data_t	*bdata,
	unsigned long	mapstart,
	unsigned long	start,
	unsigned long	end
)
{
	unsigned long mapsize = ((end - start)+7)/8;

	mapsize = ALIGN(mapsize, sizeof(long));
	bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
	bdata->node_boot_start = (start << PAGE_SHIFT);
	bdata->node_low_pfn = end;
	link_bootmem(bdata);

	/*
	 * Initially all pages are reserved - setup_arch() has to
	 * register free RAM areas explicitly.
	 */
	memset(bdata->node_bootmem_map, 0xff, mapsize);

	return mapsize;
}

/**
 * Marks a particular physical memory range as unallocatable. Usable RAM
 * might be used for boot-time allocations - or it might get added
 * to the free page pool later on.
 */
static void __init
reserve_bootmem_core(
	bootmem_data_t	*bdata,
	unsigned long	addr,
	unsigned long	size
)
{
	unsigned long sidx, eidx;
	unsigned long i;

	/*
	 * round up, partially reserved pages are considered
	 * fully reserved.
	 */
	BUG_ON(!size);
	BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
	BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);

	sidx = PFN_DOWN(addr - bdata->node_boot_start);
	eidx = PFN_UP(addr + size - bdata->node_boot_start);

	for (i = sidx; i < eidx; i++) {
		if (test_and_set_bit(i, bdata->node_bootmem_map)) {
#ifdef CONFIG_DEBUG_BOOTMEM
			printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
#endif
		}
	}
}

/**
 * Frees a section of bootmemory.
 */
static void __init
free_bootmem_core(
	bootmem_data_t	*bdata,
	unsigned long	addr,
	unsigned long	size
)
{
	unsigned long i;
	unsigned long start;
	/*
	 * round down end of usable mem, partially free pages are
	 * considered reserved.
	 */
	unsigned long sidx;
	unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
	unsigned long end = (addr + size)/PAGE_SIZE;

	BUG_ON(!size);
	BUG_ON(end > bdata->node_low_pfn);

	if (addr < bdata->last_success)
		bdata->last_success = addr;

	/*
	 * Round up the beginning of the address.
	 */
	start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
	sidx = start - (bdata->node_boot_start/PAGE_SIZE);

	for (i = sidx; i < eidx; i++) {
		if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
			BUG();
	}
}

/**
 * We 'merge' subsequent allocations to save space. We might 'lose'
 * some fraction of a page if allocations cannot be satisfied due to
 * size constraints on boxes where there is physical RAM space
 * fragmentation - in these cases (mostly large memory boxes) this
 * is not a problem.
 *
 * On low memory boxes we get it right in 100% of the cases.
 *
 * alignment has to be a power of 2 value.
 *
 * NOTE:  This function is _not_ reentrant.
 */
void * __init
__alloc_bootmem_core(
	struct bootmem_data	*bdata,
	unsigned long		size,
	unsigned long		align,
	unsigned long		goal,
	unsigned long		limit
)
{
	unsigned long offset, remaining_size, areasize, preferred;
	unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
	void *ret;

	if (bootmem_destoyed)
		panic("The bootmem allocator has been destroyed.");

	if(!size) {
		printk("__alloc_bootmem_core(): zero-sized request\n");
		BUG();
	}
	BUG_ON(align & (align-1));

	if (limit && bdata->node_boot_start >= limit)
		return NULL;

        limit >>=PAGE_SHIFT;
	if (limit && end_pfn > limit)
		end_pfn = limit;

	eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
	offset = 0;
	if (align &&
	    (bdata->node_boot_start & (align - 1UL)) != 0)
		offset = (align - (bdata->node_boot_start & (align - 1UL)));
	offset >>= PAGE_SHIFT;

	/*
	 * We try to allocate bootmem pages above 'goal'
	 * first, then we try to allocate lower pages.
	 */
	if (goal && (goal >= bdata->node_boot_start) && 
	    ((goal >> PAGE_SHIFT) < end_pfn)) {
		preferred = goal - bdata->node_boot_start;

		if (bdata->last_success >= preferred)
			if (!limit || (limit && limit > bdata->last_success))
				preferred = bdata->last_success;
	} else
		preferred = 0;

	preferred = ALIGN(preferred, align) >> PAGE_SHIFT;
	preferred += offset;
	areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
	incr = align >> PAGE_SHIFT ? : 1;

restart_scan:
	for (i = preferred; i < eidx; i += incr) {
		unsigned long j;
		i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
		i = ALIGN(i, incr);
		if (i >= eidx)
			break;
		if (test_bit(i, bdata->node_bootmem_map))
			continue;
		for (j = i + 1; j < i + areasize; ++j) {
			if (j >= eidx)
				goto fail_block;
			if (test_bit (j, bdata->node_bootmem_map))
				goto fail_block;
		}
		start = i;
		goto found;
	fail_block:
		i = ALIGN(j, incr);
	}

	if (preferred > offset) {
		preferred = offset;
		goto restart_scan;
	}
	return NULL;

found:
	bdata->last_success = start << PAGE_SHIFT;
	BUG_ON(start >= eidx);

	/*
	 * Is the next page of the previous allocation-end the start
	 * of this allocation's buffer? If yes then we can 'merge'
	 * the previous partial page with this allocation.
	 */
	if (align < PAGE_SIZE &&
	    bdata->last_offset && bdata->last_pos+1 == start) {
		offset = ALIGN(bdata->last_offset, align);
		BUG_ON(offset > PAGE_SIZE);
		remaining_size = PAGE_SIZE-offset;
		if (size < remaining_size) {
			areasize = 0;
			/* last_pos unchanged */
			bdata->last_offset = offset+size;
			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
						bdata->node_boot_start);
		} else {
			remaining_size = size - remaining_size;
			areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
						bdata->node_boot_start);
			bdata->last_pos = start+areasize-1;
			bdata->last_offset = remaining_size;
		}
		bdata->last_offset &= ~PAGE_MASK;
	} else {
		bdata->last_pos = start + areasize - 1;
		bdata->last_offset = size & ~PAGE_MASK;
		ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
	}

	/*
	 * Reserve the area now:
	 */
	for (i = start; i < start+areasize; i++)
		if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
			BUG();
	memset(ret, 0, size);
	return ret;
}

static void __init
free_all_bootmem_core(struct bootmem_data *bdata)
{
	unsigned long pfn;
	unsigned long vaddr;
	unsigned long i, m, count;
	unsigned long bootmem_total=0, kmem_total=0, umem_total=0;
	unsigned long kmem_max_idx, max_idx;
	unsigned long *map; 
	struct pmem_region rgn;

	BUG_ON(!bdata->node_bootmem_map);

	kmem_max_idx = (kmem_size >> PAGE_SHIFT) - (bdata->node_boot_start >> PAGE_SHIFT);
	max_idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
	BUG_ON(kmem_max_idx > max_idx);

	/* Create the initial kernel managed memory pool (kmem) */
	count = 0;
	pfn = bdata->node_boot_start >> PAGE_SHIFT;  /* first extant page of node */
	map = bdata->node_bootmem_map;
	for (i = 0; i < kmem_max_idx; ) {
		unsigned long v = ~map[i / BITS_PER_LONG];

		if (v) {
			vaddr = (unsigned long) __va(pfn << PAGE_SHIFT);
			for (m = 1; m && i < kmem_max_idx; m<<=1, vaddr+=PAGE_SIZE, i++) {
				if (v & m) {
					count++;
					kmem_add_memory(vaddr, PAGE_SIZE);
				}
			}
		} else {
			i+=BITS_PER_LONG;
		}
		pfn += BITS_PER_LONG;
	}
	BUG_ON(count == 0);

	/*
	 * At this point, kmem_alloc() will work. The physical memory tracking
	 * code relies on kmem_alloc(), so it cannot be initialized until now.
	 *
	 * Tell the physical memory tracking subsystem about the kernel-managed
	 * pool and the remaining memory that will be managed by user-space.
	 */
	pfn = bdata->node_boot_start >> PAGE_SHIFT;  /* first extant page of node */
	map = bdata->node_bootmem_map;
	pmem_region_unset_all(&rgn);
	rgn.type_is_set = true;
	rgn.allocated_is_set = true;
	rgn.lgroup_is_set = true;
	for (i = 0; i < max_idx; ) {
		unsigned long v = ~map[i / BITS_PER_LONG];
		unsigned long paddr = (unsigned long) pfn << PAGE_SHIFT;

		for (m = 1; m && i < max_idx; m<<=1, paddr+=PAGE_SIZE, i++) {
			rgn.start = paddr;
			rgn.end   = paddr + PAGE_SIZE;

			if (v & m) {
				if (i < kmem_max_idx) {
					rgn.type = PMEM_TYPE_KMEM;
					rgn.allocated = true;
					rgn.lgroup = 0;
					++kmem_total;
				} else {
					rgn.type = PMEM_TYPE_UMEM;
					rgn.allocated = false;
					rgn.lgroup = 0;
					++umem_total;
				}
			} else {
				rgn.type = PMEM_TYPE_BOOTMEM;
				rgn.allocated = true;
				rgn.lgroup = 0;
				++bootmem_total;
			}

			if (pmem_add(&rgn))
				BUG();
		}

		pfn += BITS_PER_LONG;
	}

	/*
	 * Now free the allocator bitmap itself, it's not
	 * needed anymore:
	 */
	vaddr = (unsigned long)bdata->node_bootmem_map;
	count = 0;
	pmem_region_unset_all(&rgn);
	rgn.type_is_set = true;
	rgn.allocated_is_set = true;
	rgn.lgroup_is_set = true;
	for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,vaddr+=PAGE_SIZE) {
		count++;

		rgn.start = __pa(vaddr);
		rgn.end   = rgn.start + PAGE_SIZE;

		if (i < kmem_max_idx) {
			kmem_add_memory(vaddr, PAGE_SIZE);
			rgn.type = PMEM_TYPE_KMEM;
			rgn.allocated = true;
			rgn.lgroup = 0;
		} else {
			rgn.type = PMEM_TYPE_UMEM;
			rgn.allocated = false;
			rgn.lgroup = 0;
		}

		pmem_add(&rgn);
	}
	BUG_ON(count == 0);

	/* Mark the bootmem allocator as dead */
	bdata->node_bootmem_map = NULL;

	printk(KERN_DEBUG
	       "The boot-strap bootmem allocator has been destroyed:\n");
	printk(KERN_DEBUG
	       "  %lu bytes released to the kernel-managed memory pool (kmem)\n",
	       kmem_total << PAGE_SHIFT);
	printk(KERN_DEBUG
	       "  %lu bytes released to the user-managed memory pool (umem)\n",
	       umem_total << PAGE_SHIFT);
}

/**
 * Initialize boot memory allocator.
 */
unsigned long __init
init_bootmem(unsigned long start, unsigned long pages)
{
	return init_bootmem_core(&bootmem_data, start, 0, pages);
}

/**
 * Reserve a portion of the boot memory.
 * This prevents the reserved memory from being allocated.
 */
void __init
reserve_bootmem(unsigned long addr, unsigned long size)
{
	reserve_bootmem_core(&bootmem_data, addr, size);
}

/**
 * Return a portion of boot memory to the free pool.
 * Note that the region freed is the set of pages covering
 * the byte range [addr, addr+size).
 */
void __init
free_bootmem(unsigned long addr, unsigned long size)
{
	free_bootmem_core(&bootmem_data, addr, size);
}

void __init
free_all_bootmem(void)
{
	free_all_bootmem_core(&bootmem_data);
	bootmem_destoyed = true;
}

static void * __init
__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
{
	bootmem_data_t *bdata;
	void *ptr;

	list_for_each_entry(bdata, &bdata_list, list)
		if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
			return(ptr);
	return NULL;
}

/**
 * Allocate a chunk of memory from the boot memory allocator.
 *
 *     size  = number of bytes requested
 *     align = required alignment
 *     goal  = hint specifying address to start search.
 */
void * __init
__alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
{
	void *mem = __alloc_bootmem_nopanic(size,align,goal);
	if (mem)
		return mem;
	/*
	 * Whoops, we cannot satisfy the allocation request.
	 */
	printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
	panic("Out of memory");
	return NULL;
}

/**
 * Allocates a block of memory of the specified size.
 */
void * __init
alloc_bootmem(unsigned long size)
{
	return __alloc_bootmem(size, SMP_CACHE_BYTES, 0);
}

/**
 * Allocates a block of memory of the specified size and alignment.
 */
void * __init
alloc_bootmem_aligned(unsigned long size, unsigned long align)
{
	return __alloc_bootmem(size, align, 0);
}

/**
 * Initializes the kernel memory subsystem.
 */
void __init
mem_subsys_init(void)
{
	/* We like powers of two */
	if (!is_power_of_2(kmem_size)) {
		printk(KERN_WARNING "kmem_size must be a power of two.");
		kmem_size = roundup_pow_of_two(kmem_size);
	}

	printk(KERN_DEBUG
	       "First %lu bytes of system memory reserved for the kernel.\n",
	       kmem_size);

	/* Initialize the kernel memory pool */
	kmem_create_zone(PAGE_OFFSET, kmem_size);
	free_all_bootmem();
	arch_memsys_init(kmem_size);
}

